* Firm organization with multiple establishments
* Appendix A: Data preparation for Tables A.27-A.30

clear all 
set matsize 2000
set more off

capture log close
log using log/42_cmrh-cs_replication_est_data_layer-est.log, replace

********************************************************************************
***	Prepare data to compare establishment types ********************************

use data/Panel.dta if jahr == 2012, clear
desc
//	Drop employee characteristics that are not needed in analyses
drop grund stib frau foreign ein_erw nbr_occ tage_wz tage_occ ///
	l_beruf f_beruf l_tentgelt f_tentgelt d_age 
//	Drop firm characteristics that are not needed in analyses
drop mean_wage verysmall
drop w08_5 grd_dat lzt_dat ao_bula
order limit cens lnw, after(tentgelt)
	
********************************************************************************
***	Sample restriction *********************************************************
********************************************************************************

*	Minimum firm size
bys untid jahr: egen empl_unt = count(persnr)
by  untid: egen min_empl = min(empl_unt)
qui keep if min_empl >= 10
drop min_empl

*	Cleaning: social security limit
qui replace tentgelt = limit if cens == 1
drop limit cens
qui replace lnw = ln(tentgelt) 		
count if lnw == .

*	Multi-establishment firms
egen flg_estjhr = tag(betnr jahr)
bys untid jahr: egen count_est = total(flg_estjhr)
gen aux_mbu = (count_est > 1)
bys untid jahr: egen d_mbu = max(aux_mbu)
cap drop flg_untjhr
egen flg_untjhr = tag(untid jahr)

keep if count_est > 1
drop aux_mbu flg_untjhr

********************************************************************************
***	Establishment characteristics **********************************************
********************************************************************************

bys betnr jahr: egen empl_est = count(persnr)

merge m:1 betnr jahr using data/BHPinclUntID.dta
qui keep if _merge == 3
drop _merge

drop count_est_siab chge_est entry_unt jahr_eins_final jahr_eins_est az_vz eintritt ///
	betnr_vor untid_vor verysmall matchtype d_multest_4 ever_multest_4 always_multest_4 vorg_died ///
	mode austritt betnr_nach nachf_willbenew untid_nach vorgaenger nachfolger ///
	erst_jahr_eintritt jahr_lzt_est first_year_ME entry_dyn exit_dyn exit_unt add_est mode_unt sector sector2 hq_sector

order hq_kreis, after(ao_bula)

********************************************************************************
***	Layer classification based on KldB2010, managerial organization ************
********************************************************************************

merge m:1 beruf2010 using data/KldB2010_LayerCMRHFriedrich.dta
qui drop if _merge == 2 
drop _merge

qui replace layer_neuAG = 3 if beruf2010 == 73294
qui replace layer_neuAG = 2 if beruf2010 == 71224
qui replace layer_neuAG = 0 if beruf2010 == 1402
qui replace layer_neuAG = 0 if beruf2010 == 1302
qui replace layer_neuAG = 0 if beruf2010 == 1203
qui replace layer_neuAG = 1 if beruf2010 == 1104

qui drop layer
rename layer_neuAG layer


bys betnr jahr: egen lowest = min(layer)
bys betnr jahr: egen second_lowest = min(layer) if layer > lowest
bys betnr jahr: egen third_lowest = min(layer) if layer > second_lowest
bys betnr jahr: egen highest = min(layer) if layer > third_lowest


gen layer_rank = .
replace layer_rank = 0 if layer == lowest
replace layer_rank = 1 if layer == second_lowest & layer_rank == .
replace layer_rank = 2 if layer == third_lowest & layer_rank == .
replace layer_rank = 3 if layer == highest & layer_rank == .
drop lowest *_lowest highest

tab2 layer_rank layer, missing

// Number of layers
egen flg_estjhrlay = tag(betnr jahr layer)
bys betnr jahr: egen count_lay_est = total(flg_estjhrlay)
drop flg_estjhrlay

// Combination of layers
sort betnr jahr
qui {
	forvalues l = 0/3 {
		egen e_`l' = anymatch(layer), values(`l')
		by betnr jahr: egen count_layer`l' = sum(e_`l')
		by betnr jahr: gen l_`l' = (count_layer`l' > 0)
		label variable l_`l' "Dummy establishment has layer `l'"
		capture drop e_`l' count_layer`l'
	}
}

capture drop comb_layer_est
gen comb_layer_est = l_3 * 1000 + l_2 * 100 + l_1 * 10 + l_0 
label variable comb_layer_est "Pattern of layers, from high to low, establishment level"
label define structure 1 "0" 10 "1" 100 "2" 1000 "3" 11 "0 + 1" 101 "0 + 2" 1001 "0 + 3" ///
	110 "1 + 2" 1010 "1 + 3" 1100 "2 + 3" 111 "0 + 1 + 2" 1101 "0 + 2 + 3" 1011 "0 + 1 + 3" ///
	1110 "1 + 2 + 3" 1111 "All layers"
label values comb_layer_est structure
drop l_*

//	Number of managerial layers
egen flg_estjhrlay = tag(betnr jahr layer_rank)
qui replace flg_estjhrlay = 0 if layer_rank == 0
bys betnr jahr: egen count_mgmt_est = total(flg_estjhrlay)
drop flg_estjhrlay

preserve
//	Number of managerial layers
egen flg_untjhrlay = tag(untid jahr layer_rank)
qui replace flg_untjhrlay = 0 if layer_rank == 0
bys untid jahr: egen count_mgmt_unt = total(flg_untjhrlay)

keep betnr jahr count_mgmt_unt
duplicates drop
save data/est_count_mgmt_unt_layer-est.dta, replace

restore


********************************************************************************
***	establishment characteristics **********************************************
********************************************************************************

// Other sums
bys betnr jahr: egen tot_wages = total(tentgelt)
bys betnr jahr: egen tot_educ = total(d_educ)
bys betnr jahr: egen tot_tenure = total(tage_bet)

// Reduce to establishment-level variables
preserve
keep betnr jahr empl_est tot_* count_lay_est comb_layer_est count_mgmt_est count_est hauptbet

duplicates drop
duplicates report betnr jahr
tab jahr, missing
label variable empl_est "# of employees in establishment"
label variable tot_wages "Wage sum in establishment"
label variable tot_educ "Education sum in establishment"
label variable count_lay_est "# of layers in establishment"
label variable count_mgmt_est "# of mgmt layers in establishment"
describe

save data/estlevel_CMRH_layer-est.dta, replace
restore

********************************************************************************
***	Hierarchy characteristics **************************************************
********************************************************************************

bys betnr jahr layer_rank: egen empl_lyr = count(persnr)
bys betnr jahr layer_rank: egen wage_lyr = total(tentgelt)
bys betnr jahr layer_rank: egen educ_lyr = total(d_educ)
bys betnr jahr layer_rank: egen tenure_lyr = total(tage_bet)

egen flg_estjhrlay = tag(betnr jahr layer_rank)
keep if flg_estjhrlay == 1
drop flg_estjhrlay

keep wage_lyr empl_lyr educ_lyr tenure_lyr jahr betnr layer_rank
reshape wide wage_lyr empl_lyr educ_lyr tenure_lyr, i(jahr betnr) j(layer_rank)

********************************************************************************
***	Combine data ***************************************************************
********************************************************************************

merge 1:1 betnr jahr using data/estlevel_CMRH_layer-est.dta
drop if _merge == 2
drop _merge

capture drop flg_est
capture drop flg_estjhr
capture drop flg_estjhr

egen flg_est = tag(betnr)
egen flg_estjhr = tag(betnr jahr)

xtset, clear
sort betnr jahr
xtset betnr jahr

cap drop flg_estjhr
egen flg_estjhr = tag(betnr jahr)

// add number of management layers at firm level
merge 1:1 betnr jahr using data/est_count_mgmt_unt_layer-est.dta
drop if _merge == 2
drop _merge

// add indicator when org structure at firm-level is equal to structure at establishment-level
merge 1:1 betnr jahr using data/est_count_mgmt_unt.dta, keepusing(same_rank_unt_est)
drop if _merge == 2
drop _merge

************************************
** Other supporting variables  *****
************************************
gen avg_wage=tot_wages/empl_est

*marking establishments with adjacent (consecutively ordered) layers
gen correctlyr= (comb_layer_est == 1 | comb_layer_est ==11  | comb_layer_est ==111 | comb_layer_est ==1111)
*keep if correctlyr==1 	// remove the asterisk and let this instruction be executed to 
						// produce tables with consecutively ordered layers.
						
*average wage and skill by layer
foreach v in wage educ tenure {
	forvalues l=0/3 {
		gen avg_`v'_lyr`l'=`v'_lyr`l'/empl_lyr`l'
		
	}
}
*

*log employees
gen ln_empl_est=log(empl_est)

*log average wage
gen lavgw=log(avg_wage)

sort betnr jahr
* generating normalized empl
gen norm_empl_lyr0=.
gen norm_empl_lyr1=.
gen norm_empl_lyr2=.
gen norm_empl_lyr3=.
forvalues l=0/3 {
		forvalues c=0/`l' {
			gen temp = empl_lyr`c'/empl_lyr`l' if count_mgmt_est==`l'
			replace norm_empl_lyr`c' = temp if count_mgmt_est==`l'
			drop temp
		}		
}
*
egen norm_empl=rowtotal(norm_empl*)

compress 
save "data/CMRH_2012_cs_est_layer-est.dta", replace

********************************************************************************
********************************************************************************

log close
